import jieba.analyse as analyse
from textrank4zh import TextRank4Keyword


lines = open(u'考研历年阅读素材1980-.txt', 'rb').read()

word = TextRank4Keyword()
word.analyze(lines, window=2, lower=True)
w_list = word.get_keywords(num=100, word_min_len=1)
# 基于textrank的算法
with open("C:\\Users\\Administrator\\Desktop\\高频词汇T.txt", "w", encoding='utf-8') as f:
    f.write("  ".join(w.word for w in w_list ))
# 基于TF-IDF的算法
with open("C:\\Users\\Administrator\\Desktop\\高频词汇TF.txt", "w", encoding='utf-8') as f:
    f.write("  ".join(analyse.extract_tags(lines, topK=100,withWeight=False, allowPOS=())))
